A short description of the post.
packages = c('sp', 'rgdal', 'spNetwork', 'tmap', 'maptools', 'sf', 'raster', 'spatstat', 'tidyverse', 'plotly', 'ggthemes')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}
train_services_sf <- st_read(dsn="data/geospatial", layer="MRTLRTStnPtt")
Reading layer `MRTLRTStnPtt' from data source
`C:\darrylkwok\IS415_blog\_posts\2021-09-15-take-home-exercise-2\data\geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 171 features and 3 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 6138.311 ymin: 27555.06 xmax: 45254.86 ymax: 47854.2
Projected CRS: SVY21
mpsz_sf <- st_read(dsn = "data/geospatial", layer = "MP14_SUBZONE_WEB_PL")
Reading layer `MP14_SUBZONE_WEB_PL' from data source
`C:\darrylkwok\IS415_blog\_posts\2021-09-15-take-home-exercise-2\data\geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 323 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
Projected CRS: SVY21
st_crs(train_services_sf)
Coordinate Reference System:
User input: SVY21
wkt:
PROJCRS["SVY21",
BASEGEOGCRS["SVY21[WGS84]",
DATUM["World Geodetic System 1984",
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]],
ID["EPSG",6326]],
PRIMEM["Greenwich",0,
ANGLEUNIT["Degree",0.0174532925199433]]],
CONVERSION["unnamed",
METHOD["Transverse Mercator",
ID["EPSG",9807]],
PARAMETER["Latitude of natural origin",1.36666666666667,
ANGLEUNIT["Degree",0.0174532925199433],
ID["EPSG",8801]],
PARAMETER["Longitude of natural origin",103.833333333333,
ANGLEUNIT["Degree",0.0174532925199433],
ID["EPSG",8802]],
PARAMETER["Scale factor at natural origin",1,
SCALEUNIT["unity",1],
ID["EPSG",8805]],
PARAMETER["False easting",28001.642,
LENGTHUNIT["metre",1],
ID["EPSG",8806]],
PARAMETER["False northing",38744.572,
LENGTHUNIT["metre",1],
ID["EPSG",8807]]],
CS[Cartesian,2],
AXIS["(E)",east,
ORDER[1],
LENGTHUNIT["metre",1,
ID["EPSG",9001]]],
AXIS["(N)",north,
ORDER[2],
LENGTHUNIT["metre",1,
ID["EPSG",9001]]]]
st_crs(mpsz_sf)
Coordinate Reference System:
User input: SVY21
wkt:
PROJCRS["SVY21",
BASEGEOGCRS["SVY21[WGS84]",
DATUM["World Geodetic System 1984",
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]],
ID["EPSG",6326]],
PRIMEM["Greenwich",0,
ANGLEUNIT["Degree",0.0174532925199433]]],
CONVERSION["unnamed",
METHOD["Transverse Mercator",
ID["EPSG",9807]],
PARAMETER["Latitude of natural origin",1.36666666666667,
ANGLEUNIT["Degree",0.0174532925199433],
ID["EPSG",8801]],
PARAMETER["Longitude of natural origin",103.833333333333,
ANGLEUNIT["Degree",0.0174532925199433],
ID["EPSG",8802]],
PARAMETER["Scale factor at natural origin",1,
SCALEUNIT["unity",1],
ID["EPSG",8805]],
PARAMETER["False easting",28001.642,
LENGTHUNIT["metre",1],
ID["EPSG",8806]],
PARAMETER["False northing",38744.572,
LENGTHUNIT["metre",1],
ID["EPSG",8807]]],
CS[Cartesian,2],
AXIS["(E)",east,
ORDER[1],
LENGTHUNIT["metre",1,
ID["EPSG",9001]]],
AXIS["(N)",north,
ORDER[2],
LENGTHUNIT["metre",1,
ID["EPSG",9001]]]]
We can see from these 2 st_crs() outputs that, the projections are in SVY21, however the EPSG code is 9001. The correct EPSG code is 3414.
Therefore, we will assign the correct EPSG code.
train_services_sf <- st_set_crs(train_services_sf, 3414)
mpsz_sf <- st_set_crs(mpsz_sf, 3414)
Check the crs of the 2 geospatial data again to confirm the changes
st_crs(train_services_sf)
Coordinate Reference System:
User input: EPSG:3414
wkt:
PROJCRS["SVY21 / Singapore TM",
BASEGEOGCRS["SVY21",
DATUM["SVY21",
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
ID["EPSG",4757]],
CONVERSION["Singapore Transverse Mercator",
METHOD["Transverse Mercator",
ID["EPSG",9807]],
PARAMETER["Latitude of natural origin",1.36666666666667,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8801]],
PARAMETER["Longitude of natural origin",103.833333333333,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8802]],
PARAMETER["Scale factor at natural origin",1,
SCALEUNIT["unity",1],
ID["EPSG",8805]],
PARAMETER["False easting",28001.642,
LENGTHUNIT["metre",1],
ID["EPSG",8806]],
PARAMETER["False northing",38744.572,
LENGTHUNIT["metre",1],
ID["EPSG",8807]]],
CS[Cartesian,2],
AXIS["northing (N)",north,
ORDER[1],
LENGTHUNIT["metre",1]],
AXIS["easting (E)",east,
ORDER[2],
LENGTHUNIT["metre",1]],
USAGE[
SCOPE["Cadastre, engineering survey, topographic mapping."],
AREA["Singapore - onshore and offshore."],
BBOX[1.13,103.59,1.47,104.07]],
ID["EPSG",3414]]
st_crs(mpsz_sf)
Coordinate Reference System:
User input: EPSG:3414
wkt:
PROJCRS["SVY21 / Singapore TM",
BASEGEOGCRS["SVY21",
DATUM["SVY21",
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
ID["EPSG",4757]],
CONVERSION["Singapore Transverse Mercator",
METHOD["Transverse Mercator",
ID["EPSG",9807]],
PARAMETER["Latitude of natural origin",1.36666666666667,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8801]],
PARAMETER["Longitude of natural origin",103.833333333333,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8802]],
PARAMETER["Scale factor at natural origin",1,
SCALEUNIT["unity",1],
ID["EPSG",8805]],
PARAMETER["False easting",28001.642,
LENGTHUNIT["metre",1],
ID["EPSG",8806]],
PARAMETER["False northing",38744.572,
LENGTHUNIT["metre",1],
ID["EPSG",8807]]],
CS[Cartesian,2],
AXIS["northing (N)",north,
ORDER[1],
LENGTHUNIT["metre",1]],
AXIS["easting (E)",east,
ORDER[2],
LENGTHUNIT["metre",1]],
USAGE[
SCOPE["Cadastre, engineering survey, topographic mapping."],
AREA["Singapore - onshore and offshore."],
BBOX[1.13,103.59,1.47,104.07]],
ID["EPSG",3414]]
Check if the geometries are valid for the train_services_sf
From the above output, we can see that there are no invalid geometries for the train_services_sf
Check if the geometries are valid for the mpsz_sf
From the above output, we can see that there are 9 invalid geometries in the mpsz data
Therefore, we will handle the invalid geometries and check if the changes are made.
tmap_mode('view')
tm_shape(mpsz_sf) +
tm_polygons() +
tm_shape(train_services_sf) +
tm_dots(col="red", size=0.1)
tmap_mode('plot')
june_2019 <- read_csv("data/aspatial/30062019.csv")
june_2021 <- read_csv("data/aspatial/30062021.csv")
hotels <- read_csv("data/aspatial/hotels.csv")
tourism <- read_csv("data/aspatial/tourism.csv")
glimpse(june_2019)
Rows: 8,293
Columns: 16
$ id <dbl> 49091, 50646, 56334, 71609, 7~
$ name <chr> "COZICOMFORT LONG TERM STAY R~
$ host_id <dbl> 266763, 227796, 266763, 36704~
$ host_name <chr> "Francesca", "Sujatha", "Fran~
$ neighbourhood_group <chr> "North Region", "Central Regi~
$ neighbourhood <chr> "Woodlands", "Bukit Timah", "~
$ latitude <dbl> 1.44255, 1.33235, 1.44246, 1.~
$ longitude <dbl> 103.7958, 103.7852, 103.7967,~
$ room_type <chr> "Private room", "Private room~
$ price <dbl> 81, 80, 68, 200, 92, 102, 203~
$ minimum_nights <dbl> 180, 90, 6, 1, 1, 1, 1, 7, 30~
$ number_of_reviews <dbl> 1, 18, 20, 12, 20, 35, 23, 15~
$ last_review <date> 2013-10-21, 2014-12-26, 2015~
$ reviews_per_month <dbl> 0.01, 0.28, 0.21, 0.13, 0.21,~
$ calculated_host_listings_count <dbl> 2, 1, 2, 9, 9, 9, 9, 1, 4, 4,~
$ availability_365 <dbl> 365, 365, 365, 353, 353, 348,~
glimpse(june_2021)
Rows: 4,238
Columns: 16
$ id <dbl> 49091, 50646, 56334, 71609, 7~
$ name <chr> "COZICOMFORT LONG TERM STAY R~
$ host_id <dbl> 266763, 227796, 266763, 36704~
$ host_name <chr> "Francesca", "Sujatha", "Fran~
$ neighbourhood_group <chr> "North Region", "Central Regi~
$ neighbourhood <chr> "Woodlands", "Bukit Timah", "~
$ latitude <dbl> 1.44129, 1.33432, 1.44041, 1.~
$ longitude <dbl> 103.7957, 103.7852, 103.7967,~
$ room_type <chr> "Private room", "Private room~
$ price <dbl> 81, 80, 67, 177, 81, 81, 52, ~
$ minimum_nights <dbl> 180, 90, 6, 90, 90, 90, 14, 1~
$ number_of_reviews <dbl> 1, 18, 20, 20, 24, 48, 20, 13~
$ last_review <date> 2013-10-21, 2014-07-08, 2015~
$ reviews_per_month <dbl> 0.01, 0.22, 0.16, 0.29, 0.34,~
$ calculated_host_listings_count <dbl> 2, 1, 2, 4, 4, 4, 50, 50, 7, ~
$ availability_365 <dbl> 365, 365, 365, 365, 365, 365,~
glimpse(hotels)
Rows: 422
Columns: 9
$ NAME <chr> "Jayleen Clarke Quay Hotel", "JEN Singapor~
$ ADDRESSPOSTALCODE <dbl> 59390, 238858, 249716, 399041, 238485, 399~
$ ADDRESSSTREETNAME <chr> "25 New Bridge Road", "277 Orchard Road , ~
$ HYPERLINK <chr> "jayleenclarkequay@gmail.com", "singaporeo~
$ TOTALROOMS <dbl> 20, 499, 565, 42, 81, 33, 17, 634, 56, 451~
$ KEEPERNAME <chr> "James Federick Chong Kah Yean", "Kuok Oon~
$ Lat <dbl> 1.288715, 1.300510, 1.304271, 1.311586, 1.~
$ Lng <dbl> 103.8475, 103.8392, 103.8239, 103.8775, 10~
$ ICON_NAME <chr> "hotel.gif", "hotel.gif", "hotel.gif", "ho~
glimpse(tourism)
Rows: 107
Columns: 17
$ NAME <chr> "Chinatown Heritage Centre, Singapore", "T~
$ DESCRIPTION <chr> "Experience how Singapore’s early Chinese ~
$ ADDRESSSTREETNAME <chr> "48 Pagoda Street", "158 Telok Ayer Street~
$ HYPERLINK <chr> "http://www.singaporechinatown.com.sg/", "~
$ PHOTOURL <chr> "www.yoursingapore.com/content/dam/desktop~
$ URL_PATH <chr> "www.yoursingapore.com/en/see-do-singapore~
$ IMAGE_ALT_TEXT <chr> "Learn more about local Chinese culture at~
$ PHOTOCREDITS <chr> "Joel Chua DY", "Joel Chua DY", "Joel Chua~
$ LASTMODIFIED <dttm> 2015-11-02 02:16:52, 2015-11-02 02:20:57,~
$ LATITUDE <dbl> 1.283510, 1.280940, 1.310070, 1.277219, 1.~
$ LONGTITUDE <dbl> 103.8444, 103.8476, 103.8994, 103.8373, 10~
$ META_DESCRIPTION <chr> "At the Chinatown Heritage Centre, experie~
$ OPENING_HOURS <chr> "Daily, 9am – 8pm,Last entry at 7pm.*China~
$ Lat <dbl> 1.283510, 1.280940, 1.310070, 1.277219, 1.~
$ Lng <dbl> 103.8444, 103.8476, 103.8994, 103.8373, 10~
$ ICON_NAME <chr> "tourist_spot.gif", "tourist_spot.gif", "t~
$ ADDRESSPOSTALCODE <dbl> NA, NA, NA, 0, NA, NA, NA, NA, NA, NA, NA,~
After using the glimpse() function on all the aspatial data, there are a few observations:
There are 2 geographic coordinates in the tourism dataframe
We can see that the LONGTITUDE and LATITUDE columns contains missing values but the Lat and Lng columns contains no missing values.
tourism[(is.na(tourism$LONGTITUDE) | tourism$LONGTITUDE=="" | is.na(tourism$LATITUDE) | tourism$LATITUDE == ""), ]
# A tibble: 1 x 17
NAME DESCRIPTION ADDRESSSTREETNAME HYPERLINK PHOTOURL URL_PATH
<chr> <chr> <chr> <chr> <chr> <chr>
1 Cruis~ Watch the Si~ <NA> <NA> www.your~ www.your~
# ... with 11 more variables: IMAGE_ALT_TEXT <chr>,
# PHOTOCREDITS <chr>, LASTMODIFIED <dttm>, LATITUDE <dbl>,
# LONGTITUDE <dbl>, META_DESCRIPTION <chr>, OPENING_HOURS <chr>,
# Lat <dbl>, Lng <dbl>, ICON_NAME <chr>, ADDRESSPOSTALCODE <dbl>
Although Crusises from Singapore is a tourism activity, it does not belong to a specific location on the map. Therefore we will remove it from the dataframe. This is because having 0,0 coordinates can many different meanings in Geospatial Data
Check if the row is removed from the dataframe
tourism[(is.na(tourism$LONGTITUDE) | tourism$LONGTITUDE=="" | is.na(tourism$LATITUDE) | tourism$LATITUDE == ""), ]
# A tibble: 0 x 17
# ... with 17 variables: NAME <chr>, DESCRIPTION <chr>,
# ADDRESSSTREETNAME <chr>, HYPERLINK <chr>, PHOTOURL <chr>,
# URL_PATH <chr>, IMAGE_ALT_TEXT <chr>, PHOTOCREDITS <chr>,
# LASTMODIFIED <dttm>, LATITUDE <dbl>, LONGTITUDE <dbl>,
# META_DESCRIPTION <chr>, OPENING_HOURS <chr>, Lat <dbl>,
# Lng <dbl>, ICON_NAME <chr>, ADDRESSPOSTALCODE <dbl>
Steps Taken:
june_2019_sf <- st_as_sf(june_2019, coords = c("longitude", "latitude"), crs = 4326) %>%
st_transform(crs=3414)
june_2021_sf <- st_as_sf(june_2021, coords = c("longitude", "latitude"), crs = 4326) %>%
st_transform(crs=3414)
hotels_sf <- st_as_sf(hotels, coords = c("Lng", "Lat"), crs = 4326) %>%
st_transform(crs=3414)
tourism_sf <- st_as_sf(tourism, coords = c("Lng", "Lat"), crs = 4326) %>%
st_transform(crs=3414)
tmap_mode('view')
tm_shape(mpsz_sf) +
tm_polygons() +
tm_shape(tourism_sf) +
tm_dots(col="red", size=0.1)
tmap_mode('plot')
```{.r .distill-force-highlighting-css}